clear
pwd

use EstimationSample.dta

/* Install necessary packages
ssc install erepost
* Install ftools (remove program if it existed previously)
cap ado uninstall ftools
net install ftools, from("https://raw.githubusercontent.com/sergiocorreia/ftools/master/src/")
* Install reghdfe 6.x
cap ado uninstall reghdfe
net install reghdfe, from("https://raw.githubusercontent.com/sergiocorreia/reghdfe/master/src/")
* Install parallel, if using the parallel() option; don't install from SSC
cap ado uninstall parallel
net install parallel, from(https://raw.github.com/gvegayon/parallel/stable/) replace
mata mata mlib index
cap ado uninstall ivreghdfe
cap ssc install ivreg2 // Install ivreg2, the core package
net install ivreghdfe, from(https://raw.githubusercontent.com/sergiocorreia/ivreghdfe/master/src/)
ssc install ranktest, replace
ssc install egenmore
*/
set matsize 1000
set more off


// Regressions
// housekeeping-----------------------------------------------------------------
drop BatterykWh	// this is from EV specification
ren BatterykWh_sales Batterykwh
gen logQ=log(sales+0.00)
replace PriceUSD=PriceUSD/1000
replace price_post_s=price_post_s/1000
replace incentive=incentive/1000
replace RealIncome_new=RealIncome_new/1000
gen Price_over_Income=price_post_s/log(RealIncome_new)
*replace charging_station=charging_station/1000 //EV-volume data 
*replace charging_IEA=charging_IEA/1000
gen log_charging=log(charging_IEA+0.01)
gen log_charging_slow=log(N_charger_slow+0.01) 
gen log_charging_fast=log(N_charger_fast+0.01)
egen icountry=group(country)
egen imodel=group(MakeModel Propulsion)
egen ibrand=group(Brand)
egen iregion=group(region)
egen icountryYear=group(country year)
egen icountrymodel=group(country MakeModel Propulsion)
egen icountrybrand=group(country Brand)
egen iPropulsion=group(Propulsion)

replace GlobalSegment="MPV-B" if  GlobalSegment=="MPV-A"
replace GlobalSegment="MPV-D" if  GlobalSegment=="MPV-E"
replace GlobalSegment="SUV-D" if  GlobalSegment=="PUP-D"  // pickup to SUV
replace GlobalSegment="SUV-B" if  GlobalSegment=="SUV-A"
replace GlobalSegment="SUV-E" if  GlobalSegment=="SUV-F"



// construt IVs-----------------------------------------------------------------
* 1. berry IV for price 
/*gen a=1
egen own_modelcount=total(a), by(country year GlobalSegment  Brand)
egen tot_modelcount=total(a), by(country year GlobalSegment  )
drop a*/
egen own_modelcount=nvals(imodel), by(country year GlobalSegment  Brand)		// since we do not have firm info, we use the Brand in place of firm as rough approx for now
egen tot_modelcount=nvals(imodel), by(country year GlobalSegment  )
gen  rival_modelcount=tot_modelcount-own_modelcount
tab own_modelcount
tab rival_modelcount

* sum of own and rival attributes 
foreach var in Batterykwh EngineHP Size Range {
		egen own_sum_`var'=total(`var'), by(country year GlobalSegment  Brand)
		egen tot_sum_`var'=total(`var'), by(country year GlobalSegment )
		gen  rival_sum_`var'=tot_sum_`var'-own_sum_`var'
		gen own_mean_`var'=own_sum_`var'/own_modelcount
		gen rival_mean_`var'=rival_sum_`var'/rival_modelcount
		
		*replace own_sum_`var'=own_sum_`var'-`var'
		
		gen own_diff_`var'=abs(`var'-own_mean_`var')
		gen rival_diff_`var'=abs(`var'-rival_mean_`var')
}



* mean of own and rival attributes
foreach var in Batterykwh EngineHP Size Range {
  bysort country year GlobalSegment Brand: egen `var'_own= mean(`var')
  bysort country year GlobalSegment: egen `var'_all= mean(`var')
  gen `var'_rival=(`var'_all*tot_modelcount-`var'_own*own_modelcount)/rival_modelcount
  }

* 2. battery supplier IV for price
bysort CellSupplier: egen supplier_total=total(sales)
preserve 
duplicates drop supplier supplier_total, force
keep CellSupplier supplier_total
gsort -supplier_total
list 
restore
* CATL, LG Energy Solution, Panasonic, BYD, Samsung SDI are top five, group other suppliers into "other"
gen supplier_group=CellSupplier 
replace supplier_group="other" if !inlist(CellSupplier,"CATL","LG Energy Solution","Panasonic","BYD","Samsung SDI")
egen isupplier=group(supplier_group)

* gen global sales of each battery supplier 
egen supplier_sales=total(sales), by(CellSupplier year)
egen supplier_sales_cumulative=total(sales), by(CellSupplier)
replace supplier_sales=supplier_sales/10^5
replace supplier_sales_cumulative=supplier_sales_cumulative/10^5
gen log_supplier_cumsales=log(supplier_sales_cumulative)

* 3. charging station IV 
* use same period HEV sales 
preserve
import excel "$RawData\ChargingStationIV\ConstructionUnitLaborCost_Cleaned.xlsx", sheet("Sheet1") firstrow clear
ren (B-L) (LaborCost#), addnum(2010)
reshape long LaborCost, i(SalesCountry) j(year)
ipolate LaborCost year, by(SalesCountry) gen(LaborCost_epo) epolate
replace SalesCountry="Switzerland" if SalesCountry=="Euro Area"
tempfile ChargingIV
save `ChargingIV', replace
restore

merge m:1 SalesCountry year using `ChargingIV', keepus(LaborCost_epo) keep(1 3) nogen
gen logLC=log(LaborCost_epo)


* gen number of EV models 
bysort country year: gen N_model=_N


// define globals---------------------------------------------------------------
global y "logQ" 
global x "price_post_s"
global w "incentive"

global N "log_charging"
gen NXfast=$N* (Fast_charging==1)
global NXfast "$N NXfast"
*global N_count "charging_IEA"
*global N_density_sales "charging_density_carsales" 
*global N_density_area "charging_density_area"  //global N "charging_density_urban_area" 
*global N_population "charging_density_population"

*gen log_N_fast=log(N_charger_fast)
*gen log_N_slow=log(N_charger_slow)
*global N_sep "log_N_fast log_N_slow" 

gen NXPHEV=$N*(Propulsion=="PHEV")
global N_PHEV "$N NXPHEV"
gen RangeXPHEV=Range*(Propulsion=="PHEV")
gen NXRange=$N* Range //gen NXRangeXPHEV=NXRange*(Propulsion=="PHEV")
global NXRange "$N NXRange"
*global N_Range_PHEV "$N Range NXPHEV NXRange NXRangeXPHEV"
*gen PHEV=(Propulsion=="PHEV")
*global PHEV "PHEV"
 
global alpha "Size Range RangeXPHEV"
global Size "Size"
global Capacity "Batterykwh"
global z "Population RealIncome UrbanPercentage" 

global IV_supplier "isupplier#c.Batterykwh#modelcount"
global NFI "GreenPlate Parking HOV"

global countryFE "icountry"
global modelFE "imodel"
global brandFE "ibrand"
global regionFE "iregion"
global yearFE "year"
global countryYearFE "icountryYear"
global countryModelFE "icountrymodel"
global countryBrandFE "icountrybrand"
global FuelTypeFE "iPropulsion"

label variable price_post_s "Price - Incentive (1,000 USD)"
*label variable year "Year"
label variable logQ "log(sales)"
label variable PriceUSD "MSRP (1,000 USD)"
label variable charging_station "Number of charging Ports (1,000)"
label variable charging_IEA "Number of EV charger (1,000)"
label variable incentive "Incentive (1,000 USD)"
label variable Batterykwh "Battery capacity (kWh)"
label variable Size "Vehicle size (m3)"
label variable Range "Range (miles)"
label variable NFI "Indicator of Non-financial Incentives"
label variable charging_density_urban_area "N(1/sq km)"
label variable EngineHP "Engine Horsepower"
label variable log_charging "Log Charging Ports"
label variable NXfast "1(fast charger)$\times$ Log Charging Station"
label variable RangeXPHEV "1(PHEV) $\times$ Range"
label variable NXRange "Log Charging Station $\times$ Range"


// Create interactions for Price IVs
gen Post16=(year>2016)
gen Battery_over_Income=Batterykwh/RealIncome_new
foreach var of varlist own_modelcount rival_modelcount own_sum_Batterykwh rival_sum_Batterykwh own_sum_Size rival_sum_Size own_sum_Range rival_sum_Range {
	gen `var'_over_y=`var'/log(RealIncome_new)
	gen `var'_Post16=`var'*Post16
}
gen Price_Post16 = $x * Post16



gen log_fast=log(N_charger_fast)
gen log_slow=log(N_charger_slow)
gen fastXfast_charging=log_fast*(Fast_charging)
gen PHEV=(Pro=="PHEV")


assert price_post_s>0	

// Baseline specification-------------------------------------------------------
forvalues t=1/1{
	
	foreach dep of varlist logQ delta {
		*local outcome $y		
		*local OLS_varlist delta $x $w $N $alpha NFI 
		*local IV_varlist delta $w $alpha NFI  

		local controls $alpha NFI 
		local fullfes  $countryFE $yearFE $brandFE  $FuelTypeFE 
		local priceiv isupplier#c.Batterykwh
		local blpiv  own_modelcount rival_modelcount own_sum_Batterykwh rival_sum_Batterykwh own_sum_Size rival_sum_Size own_sum_Range rival_sum_Range
	
		// try different sets of charging station IVs
		local stationiv accum_HEV_sales i.Fast_charging#c.accum_HEV_sales LaborCost_epo i.Fast_charging#c.LaborCost_epo c.accum_HEV_sales#c.LaborCost_epo i.Fast_charging#c.accum_HEV_sales#c.LaborCost_epo
		
		*HEVsales_gasrate
		
		local T1 "OLS"
		quietly eststo `T1'1: reghdfe `dep' $x $N `controls' , absorb($countryFE $yearFE) vce(cluster $countryYearFE)
		quietly eststo `T1'3: reghdfe `dep' $x $N `controls' , absorb(`fullfes') vce(cluster $countryYearFE)
		
		mat FirstStageF=J(4,2,.)
		mat TestStats=J(4,3,.)
		
		local T2 "IV"
		quietly eststo `T2'1: ivreghdfe `dep' $N `controls'  NFI ($x = `priceiv' ), absorb(`fullfes')  cluster ($countryYearFE) first
			mat temp_first=e(first)
			mat FirstStageF[1,1]=temp_first["F",1...]
			mat TestStats[1,1]=e(idstat)
			mat TestStats[1,2]=e(cdf)
			mat TestStats[1,3]=e(j)
			estadd scalar F_Price=FirstStageF[1,1]
			
		quietly eststo `T2'2: ivreghdfe `dep' `controls' ($x $N= `priceiv'  `stationiv'), absorb(`fullfes')  cluster ($countryYearFE) first
			mat temp_first=e(first)
			mat FirstStageF[2,1]=temp_first["F",1...]
			mat TestStats[2,1]=e(idstat)
			mat TestStats[2,2]=e(cdf)
			mat TestStats[2,3]=e(j)
			estadd scalar F_Price=FirstStageF[2,1]
			estadd scalar F_N=FirstStageF[2,2]
			
		quietly eststo `T2'3: ivreghdfe `dep' `controls' ($x $N= `blpiv' `stationiv') , absorb(`fullfes')  cluster ($countryYearFE) first
			mat temp_first=e(first)
			mat FirstStageF[3,1]=temp_first["F",1...]
			mat TestStats[3,1]=e(idstat)
			mat TestStats[3,2]=e(cdf)
			mat TestStats[3,3]=e(j)
			estadd scalar F_Price=FirstStageF[3,1]
			estadd scalar F_N=FirstStageF[3,2]
			
		quietly eststo `T2'4: ivreghdfe `dep' `controls' ($x $N= `priceiv' `blpiv'  `stationiv') , absorb(`fullfes')  cluster ($countryYearFE) first
			mat temp_first=e(first)
			mat FirstStageF[4,1]=temp_first["F",1...]
			mat TestStats[4,1]=e(idstat)
			mat TestStats[4,2]=e(cdf)
			mat TestStats[4,3]=e(j)
			estadd scalar F_Price=FirstStageF[4,1]
			estadd scalar F_N=FirstStageF[4,2]
		
		mat list FirstStageF
		mat list TestStats
		
		estfe OLS1 OLS3 IV1 IV2 IV3 IV4 ,labels($countryFE "Country FE" $yearFE "Year FE" $brandFE "Brand FE"   $FuelTypeFE "Fuel Type FE")
		return list
		
		* export to tex
		quietly esttab OLS1 OLS3 IV1 IV2 IV3 IV4 using "Baseline_Final_`dep'.tex",  /// 
			drop(_cons)	b(%9.3f) se(%9.3f) star(* 0.10 ** 0.05 *** 0.01) label compress nogaps nodepvars nonumber ///
			stats(F_Price F_N idstat cdf j N, fmt(2 2 2 2 2 0) label("First Stage F-stats for Price" "First Stage F-stats for Charging Station" "Underidentification Test" "Weak Identification Test" "Overidentification Test" "Observations")) ///
			order ($x $N Range RangeXPHEV Size NFI) ///
			mtitles("OLS" "OLS" "IV" "IV" "IV" "IV") ///
			mgroups("Dependent variable: `dep' ", pattern(1 0 0 0 0 0) ///
			prefix(\multicolumn{@span}{c}{) suffix(}) span) ///
			indicate(`r(indicate_fe)') replace
			
		
		// Heterogeneity analysis
		local priceiv_w_Inc isupplier#c.Batterykwh isupplier#c.Battery_over_Income
		local blpiv_w_Inc  own_modelcount rival_modelcount own_sum_Batterykwh rival_sum_Batterykwh own_sum_Size rival_sum_Size own_sum_Range rival_sum_Range own_modelcount_over_y rival_modelcount_over_y own_sum_Batterykwh_over_y rival_sum_Batterykwh_over_y own_sum_Size_over_y rival_sum_Size_over_y own_sum_Range_over_y rival_sum_Range_over_y
		local priceiv_w_Post isupplier#c.Batterykwh isupplier#c.Batterykwh#Post16
		local blpiv_w_Post own_modelcount rival_modelcount own_sum_Batterykwh rival_sum_Batterykwh own_sum_Size rival_sum_Size own_sum_Range rival_sum_Range own_modelcount_Post16 rival_modelcount_Post16 own_sum_Batterykwh_Post16 rival_sum_Batterykwh_Post16 own_sum_Size_Post16 rival_sum_Size_Post16 own_sum_Range_Post16 rival_sum_Range_over_y
		
		
		local T3 "Hgty"
		// 1. Income
		eststo `T3'1: ivreghdfe `dep' $alpha NFI ($x Price_over_Income $N= `priceiv_w_Inc' `blpiv_w_Inc'  `stationiv') , absorb(`fullfes')  cluster ($countryYearFE) first
		// 2. Gasoline Price
		eststo `T3'2: ivreghdfe `dep' `controls' GasPUSD ($x $N= `priceiv' `blpiv'  `stationiv') , absorb(`fullfes')  cluster ($countryYearFE) first
		// 3. Policy Complimentarity
		*eststo `T3'3: ivreghdfe `dep' `controls' ($x $N c.$w#c.$N = `priceiv' `blpiv'  `stationiv'  c.accum_HEV_sales#c.$w) , absorb(`fullfes')  cluster ($countryYearFE) first
		// 4. Charging stations - fast versus slow
		eststo `T3'4: ivreghdfe `dep' `controls' ($x fastXfast_charging c.log_slow#i.Fast_charging = `priceiv' `blpiv'  `stationiv') , absorb(`fullfes')  cluster ($countryYearFE) first
		// 5. Range x log Charging Stations
		eststo `T3'5: ivreghdfe `dep' `controls' ($x $N c.$N#c.Range = `priceiv' `blpiv'  `stationiv' c.accum_HEV_sales#c.Range i.Fast_charging#c.accum_HEV_sales#c.Range) , absorb(`fullfes')  cluster ($countryYearFE) first
		// 6. PHEV x log Charging Stations
		eststo `T3'6: ivreghdfe `dep' `controls' ($x $N c.$N#i.PHEV = `priceiv' `blpiv'  `stationiv' c.accum_HEV_sales#i.PHEV i.Fast_charging#c.accum_HEV_sales#i.PHEV) , absorb(`fullfes')  cluster ($countryYearFE) first
		// 7. Urbanization x log Charging Stations
		eststo `T3'7: ivreghdfe `dep' `controls' ($x $N c.$N#c.UrbanPercentage = `priceiv' `blpiv'  `stationiv' c.accum_HEV_sales#c.UrbanPercentage i.Fast_charging#c.accum_HEV_sales#c.UrbanPercentage) , absorb(`fullfes')  cluster ($countryYearFE) first
		// 8. Post x Price
		eststo `T3'8: ivreghdfe `dep' $alpha NFI ($x Price_Post16 $N= `priceiv_w_Post' `blpiv_w_Post'  `stationiv') , absorb(`fullfes')  cluster ($countryYearFE) first
		// 9. Post x Charging
		eststo `T3'9: ivreghdfe `dep' `controls' ($x $N c.$N#i.Post16 = `priceiv' `blpiv'  `stationiv' c.accum_HEV_sales#i.Post16 i.Fast_charging#c.accum_HEV_sales#i.Post16) , absorb(`fullfes')  cluster ($countryYearFE) first
		
		estfe Hgty1 Hgty7 Hgty5 Hgty6 Hgty8 Hgty9,labels($countryFE "Country FE" $yearFE "Year FE" $brandFE "Brand FE"   $FuelTypeFE "Fuel Type FE")
		return list
		
		* export to tex
		quietly esttab Hgty1 Hgty7 Hgty5 Hgty6 Hgty8 Hgty9 using "Baseline_Final_`dep'_Hgty.tex",  /// keep($varlist3)
			b(%9.3f) se(%9.3f) star(* 0.10 ** 0.05 *** 0.01) label compress nogaps nodepvars nonumber ///
			order ($x $N Range RangeXPHEV Size NFI) ///
			mtitles("(1)" "(2)" "(3)" "(4)" "(5)" "(6)") ///
			mgroups("Dependent variable: `dep' ", pattern(1 0 0 0 0 0) ///
			prefix(\multicolumn{@span}{c}{) suffix(}) span) ///
			indicate(`r(indicate_fe)') replace
	}	
}
